%% production_table2.m - PRODUCTION run for Table 2 (Bootstrap comparison)
%
% This script runs the FULL Monte Carlo simulation for Table 2 of the paper
% with ALL replications.
%
% MANUSCRIPT SETTINGS (Table 2):
%   - Sample sizes: T ∈ {20, 40, 80}
%   - ARMA(1,1) errors: ρ₁ = θ₁ ∈ {0.5, 0.8}
%   - Intervention timing: ξ ∈ {0.50, 0.75}
%   - Innovation distribution: df ∈ {5, 15, Inf} (t-dist or Gaussian)
%   - MC replications: 5,000
%   - Bootstrap replications: 5,000
%   - Bandwidth: S_T = ⌊0.75 * T^(1/3)⌋
%   - Bootstrap: YES (AIC and BIC lag selection)
%
% PRODUCTION SETTINGS (this script):
%   - MC replications: 5,000 (FULL)
%   - Bootstrap replications: 5,000 (FULL)
%   - All settings match the manuscript exactly
%
% OUTPUT LOCATION:
%   Results/production/table2_run###_YYYYMMDD/df{5,15,Inf}/
%
% ⚠️  WARNING: This will take MANY hours to complete!
%     Run test_table2.m first to verify setup.
%
% ESTIMATED RUNTIME:
%   - With bootstrap: ~30-90+ hours (depending on hardware, 3x for df loop)
%     Bootstrap is VERY computationally intensive!

clear all
close all
clc

%% Automatic run numbering
base_path = fullfile(pwd, '..', 'Results', 'production');

% Find existing run folders
dirs = dir(fullfile(base_path, 'table2_run*'));
run_folders = {dirs([dirs.isdir]).name};

% Extract run numbers and find maximum
run_nums = [];
for i = 1:length(run_folders)
    % Extract run number from folder name (e.g., 'table2_run001_20251120' -> 1)
    tokens = regexp(run_folders{i}, 'table2_run(\d+)_', 'tokens');
    if ~isempty(tokens)
        run_nums(end+1) = str2double(tokens{1}{1});
    end
end

% Determine next run number
if isempty(run_nums)
    run_num = 1;  % First run
else
    run_num = max(run_nums) + 1;  % Increment
end

% Generate run_id with date
date_str = datestr(now, 'yyyymmdd');
run_id = sprintf('run%03d_%s', run_num, date_str);

%% Configuration
% Sample sizes and correlation values (exactly as in manuscript)
time_span = [20, 40, 80];
correlation = [0.5, 0.8];  % Manuscript Table 2: stronger correlations

% Degrees of freedom for innovation distribution
% df = Inf: Gaussian (benchmark), df = 15: moderate tails, df = 5: heavy tails
df_values = [5, 15, Inf];

% Production settings
num_replications = 5000;       % PRODUCTION: full 5,000 MC replications
num_bootstrap = 5000;          % PRODUCTION: full 5,000 bootstrap replications
run_bootstrap = true;          % Table 2 REQUIRES bootstrap

% Output folder
output_folder_name = fullfile(base_path, sprintf('table2_%s', run_id));

% Create output directory if it doesn't exist
if ~exist(output_folder_name, 'dir')
    mkdir(output_folder_name);
end

% Pre-sample types: 'even' (50%), 'long' (75%)
% Note: Table 2 uses ξ ∈ {0.50, 0.75} which corresponds to 'even' and 'long'
pre_sample_types = {'even', 'long'};

%% Display configuration and confirmation
fprintf('========================================\n');
fprintf('PRODUCTION RUN: Table 2 (JAE Manuscript)\n');
fprintf('========================================\n');
fprintf('Run ID: %s\n', run_id);
fprintf('⚠️  WARNING: FULL PRODUCTION RUN WITH BOOTSTRAP\n');
fprintf('MC Replications: %d\n', num_replications);
fprintf('Bootstrap Replications: %d\n', num_bootstrap);
fprintf('Bootstrap: %s\n', mat2str(run_bootstrap));
fprintf('Sample sizes: %s\n', mat2str(time_span));
fprintf('Correlations (ρ₁=θ₁): %s\n', mat2str(correlation));
fprintf('Degrees of freedom: %s\n', mat2str(df_values));
fprintf('Pre-sample types: %s\n', strjoin(pre_sample_types, ', '));
fprintf('Output folder: %s\n', output_folder_name);
fprintf('========================================\n');
fprintf('Total simulation runs: %d\n', ...
    length(df_values) * length(pre_sample_types) * length(correlation) * length(time_span));
fprintf('Estimated time: 30-90+ hours\n');
fprintf('⚠️  Bootstrap makes this VERY slow!\n');
fprintf('========================================\n\n');

% Confirmation prompt
% Confirmation prompt
% response = input('Continue with production run? (y/n): ', 's');
% if ~strcmpi(response, 'y')
%     fprintf('Production run cancelled.\n');
%     return;
% end

%% Run simulations
fprintf('\nStarting production run with bootstrap...\n');
fprintf('This will take a LONG time. Consider running overnight.\n\n');
tic
total_runs = length(df_values) * length(pre_sample_types) * length(correlation) * length(time_span);
current_run = 0;

for df = df_values
    % Create df-specific output folder
    if isinf(df)
        df_folder = fullfile(output_folder_name, 'dfInf');
        df_str = 'Gaussian';
    else
        df_folder = fullfile(output_folder_name, sprintf('df%d', df));
        df_str = sprintf('t(%d)', df);
    end

    if ~exist(df_folder, 'dir')
        mkdir(df_folder);
    end

    fprintf('\n=== Innovation distribution: %s ===\n\n', df_str);

    for pre_sample_type = pre_sample_types
        % Create subfolder for each pre-sample type
        subfolder = fullfile(df_folder, pre_sample_type{1});
        if ~exist(subfolder, 'dir')
            mkdir(subfolder);
        end

        for rho1 = correlation
            for t = time_span
                current_run = current_run + 1;
                run_start = tic;

                % Progress indicator
                fprintf('[%d/%d] Running: df=%s, T=%d, ρ=%g, ξ=%s\n', ...
                    current_run, total_runs, df_str, t, rho1, pre_sample_type{1});
                fprintf('        Bootstrap: %d MC × %d bootstrap = %d total fits\n', ...
                    num_replications, num_bootstrap, num_replications * num_bootstrap);
                fprintf('        (This may take several hours...)\n');

                output_file_name = fullfile(subfolder, ...
                    sprintf('_%g_%d.mat', rho1, t));

                % Call simulation function with optional parameters
                run_simulation_func(t, rho1, output_file_name, pre_sample_type{1}, ...
                    'num_replications', num_replications, ...
                    'run_bootstrap', run_bootstrap, ...
                    'num_bootstrap', num_bootstrap, ...
                    'df', df);

                run_time = toc(run_start);
                remaining_runs = total_runs - current_run;
                est_remaining_time = run_time * remaining_runs;

                fprintf('        Completed in %.2f hours\n', run_time/3600);
                fprintf('        Estimated remaining time: %.2f hours\n\n', ...
                    est_remaining_time/3600);
            end
        end
    end
end

elapsed_time = toc;
fprintf('\n========================================\n');
fprintf('PRODUCTION simulation completed!\n');
fprintf('Total time: %.2f hours (%.2f days)\n', elapsed_time/3600, elapsed_time/86400);
fprintf('Average time per run: %.2f hours\n', elapsed_time/3600/total_runs);
fprintf('========================================\n');

%% Add metadata to all result files
fprintf('\nAdding metadata to result files...\n');
for df = df_values
    % Determine df folder
    if isinf(df)
        df_folder = fullfile(output_folder_name, 'dfInf');
    else
        df_folder = fullfile(output_folder_name, sprintf('df%d', df));
    end

    for pre_sample_type = pre_sample_types
        subfolder = fullfile(df_folder, pre_sample_type{1});

        for rho1 = correlation
            for t = time_span
                mat_file = fullfile(subfolder, sprintf('_%g_%d.mat', rho1, t));

                if exist(mat_file, 'file')
                    % Load existing data
                    data = load(mat_file);

                    % Add metadata
                    metadata.run_id = run_id;
                    metadata.run_num = run_num;
                    metadata.date_str = date_str;
                    metadata.run_time = datestr(now, 'yyyy-mm-dd HH:MM:SS');
                    metadata.num_replications = num_replications;
                    metadata.num_bootstrap = num_bootstrap;
                    metadata.df = df;
                    metadata.matlab_version = version;

                    % Save metadata as separate variable (cannot use -struct with -append)
                    save(mat_file, 'metadata', '-append');
                    fprintf('  ✓ Metadata added to %s\n', mat_file);
                end
            end
        end
    end
end
fprintf('Metadata successfully added to all files.\n\n');

fprintf('========================================\n');
fprintf('Output saved to: %s\n', output_folder_name);
fprintf('========================================\n');
